"""
Created on Tue Jan 25 15:54:27 2022.

@author: MLechner

Get the variables for the final estimation file.

"""
import pandas as pd
import numpy as np

from mcf import general_purpose as gp

INDATA = 'd:/mlechner/Re21Covid/data/estimation_data_2022_07_07.csv'
PRICEDATA = 'd:/mlechner/Re21Covid/data/RE21_data_2022_07_07.csv'
OUTDATA = 'd:/mlechner/Re21Covid/data/estimation_data_all_2022_07_12.csv'

SEL_VAR = [
    'Q1_2019_rent_office', 'Q1_2019_rent_residential', 'Q1_2019_rent_retail',
    'Q1_2019_sale_office', 'Q1_2019_sale_residential', 'Q1_2019_sale_retail',
    'Q2_2019_rent_office', 'Q2_2019_rent_residential', 'Q2_2019_rent_retail',
    'Q2_2019_sale_office', 'Q2_2019_sale_residential', 'Q2_2019_sale_retail',
    'Q3_2019_rent_office', 'Q3_2019_rent_residential', 'Q3_2019_rent_retail',
    'Q3_2019_sale_office', 'Q3_2019_sale_residential', 'Q3_2019_sale_retail',
    'Q4_2019_rent_office', 'Q4_2019_rent_residential', 'Q4_2019_rent_retail',
    'Q4_2019_sale_office', 'Q4_2019_sale_residential', 'Q4_2019_sale_retail',
    'Q1_2020_rent_residential', 'Q1_2020_rent_retail', 'Q1_2020_rent_office',
    'Q1_2020_sale_residential', 'Q1_2020_sale_retail', 'Q1_2020_sale_office',
    'Q2_2020_rent_residential', 'Q2_2020_rent_retail', 'Q2_2020_rent_office',
    'Q2_2020_sale_residential', 'Q2_2020_sale_retail', 'Q2_2020_sale_office',
    'Q3_2020_rent_residential', 'Q3_2020_rent_retail', 'Q3_2020_rent_office',
    'Q3_2020_sale_residential', 'Q3_2020_sale_retail', 'Q3_2020_sale_office',
    'Q4_2020_rent_residential', 'Q4_2020_rent_retail', 'Q4_2020_rent_office',
    'Q4_2020_sale_residential', 'Q4_2020_sale_retail', 'Q4_2020_sale_office',
    'Q1_2021_rent_residential', 'Q1_2021_rent_retail', 'Q1_2021_rent_office',
    'Q1_2021_sale_residential', 'Q1_2021_sale_retail', 'Q1_2021_sale_office',
    ]

IDENTIFIER = 'ags_9'
SEL_VAR_COL = 'RE21_price_id'
FEATURE = ['price', 'logprice', 'mean_area', 'mean_construction_year',
           'house_share']

new_var_level = [x + FEATURE[0][0] for x in SEL_VAR]
new_var_log = [x + FEATURE[1][:4] for x in SEL_VAR]
new_area = [x[:16] + 'area' for x in SEL_VAR]
new_cyear = [x[:16] + 'c_year' for x in SEL_VAR]
new_houseshare = [x[:16] + 'h_share' for x in SEL_VAR]

est_data = pd.read_csv(INDATA, encoding='mbcs')
re21_data = pd.read_csv(PRICEDATA, encoding='mbcs')

id_np = est_data[IDENTIFIER].to_numpy()
id_obs = len(id_np)
add_data_level = np.empty((id_obs, len(new_var_level)))
add_data_log = np.empty_like(add_data_level)
add_data_area = np.empty_like(add_data_level)
add_data_cyear = np.empty_like(add_data_level)
add_data_houses = np.empty_like(add_data_level)

# correct coding and other errors
est_data['log_income_pp'].where(est_data['income_pp'] > 0, other=0,
                                inplace=True)

for idx_row, idx_label in enumerate(id_np):
    gp.share_completed(idx_row+1, id_obs)
    for idx_col, name in enumerate(SEL_VAR):
        select = (re21_data[IDENTIFIER] == idx_label) & (
            re21_data[SEL_VAR_COL] == name)
        add_data_level[idx_row, idx_col] = re21_data[FEATURE[0]].loc[select]
        add_data_area[idx_row, idx_col] = re21_data[FEATURE[2]].loc[select]
        add_data_cyear[idx_row, idx_col] = re21_data[FEATURE[3]].loc[select]
        add_data_houses[idx_row, idx_col] = re21_data[FEATURE[4]].loc[select]
add_data_level_pd = pd.DataFrame(data=add_data_level, columns=new_var_level)
add_data_log_pd = pd.DataFrame(data=np.log(add_data_level),
                               columns=new_var_log)
add_data_area_pd = pd.DataFrame(data=add_data_area, columns=new_area)
add_data_cyear_pd = pd.DataFrame(data=add_data_cyear, columns=new_cyear)
add_data_houses_pd = pd.DataFrame(data=add_data_houses, columns=new_houseshare)

final_data = pd.concat([est_data, add_data_level_pd, add_data_log_pd,
                        add_data_area_pd, add_data_cyear_pd,
                        add_data_houses_pd], axis=1)
final_data.to_csv(OUTDATA, index=False)
gp.print_descriptive_stats_file(OUTDATA, 'all', True)

a = str("'")
e = str("',")
new_to_print = [a + x + e for x in new_var_level]
print(*new_to_print)
new_to_print = [a + x + e for x in new_var_log]
print(*new_to_print)
new_to_print = [a + x + e for x in new_area]
print(*new_to_print)
new_to_print = [a + x + e for x in new_cyear]
print(*new_to_print)
new_to_print = [a + x + e for x in new_houseshare]
print(*new_to_print)

